This file may take some time to load at times. Please be patient!

Load packages and data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggcorrplot)
library(htmltools)
library(ggpubr)
source("functions/functions.R")
data <-
  readRDS("data/gemeinderatswahlen_alldata.rds")

Create data set used in main RDD analysis.

# similar data set definition as in "Analysis.Rmd"
d_rdd_bivariate <- data %>%
  select(ln_gross_expenditure_pc, total_seats, inhabs_rel_to_cutoff, above_cutoff, inhabitants_treshold, year, ags, election_year, state, closest, exact_pop, kreisfreie_stadt) %>%
  drop_na(ln_gross_expenditure_pc, total_seats, inhabs_rel_to_cutoff)

Unvariate Statistics

data  %>%
  select(where(is.numeric)) %>%
  reshape2::melt(.) %>%
  ggplot(., aes(x = value)) + 
    facet_wrap(~variable, scales = "free") + 
    geom_histogram()
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will be
## dropped
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8117217 rows containing non-finite values (stat_bin).

ggsave("plots/histograms.svg", width = 45, height = 45, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8117217 rows containing non-finite values (stat_bin).

Bivariate descriptive statistics

Correlations

data %>%
  select(where(is.numeric)) %>%
  cor(., use = "pairwise.complete.obs") %>%
  ggcorrplot::ggcorrplot(
    .,
    colors = c("red", "white", "blue"),
    #lab = TRUE,
    digits = 1
    )
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is zero

ggsave("plots/bivariate_correlations.svg", width = 45, height = 45, units = "cm")

Cutoffs

Define cutoffs for each state, in some cases depending on years (if reforms took place) and whether town is muinicipality (“Gemeinde”) or “district-free city” (“kreisfreie Stadt”).

“th” stands for threshold.

th_1_muni_since_2012 <- c(70, 200, 750, 1250, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_muni_before_2012 <- c(70, 200, 750, 1250, 2000, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_krefr <- c(150000)
#th_3 <- c(500, 1000, 2000, 3000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 15000, 20000, 25000, 30000, 40000, 50000, 75000, 100000, 125000, 150000, 175000, 200000, 250000, 300000, 350000, 400000, 500000, 600000)
th_5 <- c(5000, 8000, 15000, 30000, 50000, 100000, 250000, 400000, 550000, 700000)
th_6 <- c(3000, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000)
th_7 <- c(300, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 30000, 40000, 60000, 80000, 100000, 150000)
th_8 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 400000)
th_9 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 100000, 200000, 500000) # Except Nuremburg and Munich
th_10 <- c(10000, 20000, 30000, 40000, 60000, 100000)
th_12_muni_since_2008 <- c(700, 1500, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
#th_12_muni_before_2008 <- c(200, 700, 1500, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
th_12_krefr <- c(100000, 150000)
th_13 <- c(500, 1000, 1500, 3000, 4500, 6000, 7500, 10000, 20000, 30000, 50000, 75000, 100000, 150000)
th_14 <- c(500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 40000, 50000, 60000, 80000, 150000, 400000)
th_15_since_2014 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_15_before_2014 <- c(100, 500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_16 <- c(500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 100000, 200000)

Cutoff rule changes

01 Schleswig-Holstein (with “2002” representing the 1998 election): You can clearly see municipalities with 2000-2500 inhabitants “overlapping” in terms of total seats (compare 2018 to prior elections):

data %>%
  filter(state == 1 & exact_pop > 1250 & exact_pop <= 5000 & (election_year == 1 | year == 2002)) %>%
  ggplot() +
  geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))

12 Brandenburg – population data only for 2019:

data %>%
  filter(state == 12 & exact_pop <= 700 & election_year == 1) %>%
  ggplot() +
  geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))

15 Saxony-Anhalt (with “2002” representing the 1999 election): After redistricting in 2007, municipalities are larger than before (and far less exist).

data %>%
  filter(state == 15 & exact_pop <= 1000 & (election_year == 1 | year == 2002)) %>%
  ggplot() +
  geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))

data %>%
  filter(state == 15 & exact_pop <= 1000 & (election_year == 1 | year == 2002)) %>%
  group_by(year) %>%
  summarize(mean = mean(exact_pop), median = median(exact_pop), n = n())

Inhabitants vs. council size for each state separately

States with no changes in cutoffs and no distinction between municipalities and “district-free cities” only (i. e. 01, 12, 15 excluded). 03 Lower Saxony excluded due to missing data. Vertical bars indicate cutoffs.

plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
  df_subset <- data %>%
    filter(state == s & (election_year == 1 | year == 2002))
  correlation <-
    cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
    round(., 2)
  plot <- df_subset %>%
    ggplot() +
    geom_vline(xintercept = get(paste0("th_", s))) +
    geom_point(aes(
      x = exact_pop,
      y = total_seats,
      colour = as.factor(year),
      text = paste("town:", town)
    )) +
    labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
    scale_x_log10()
  plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list

We observe some countries where cutoffs play a huge role (06 Hesse, 07 Rhineland-Palantine, 09 Bavaria, 10 Saarland, 16 Thuringia). In other states they do not and total_seats spreads more (05 North Rhine-Westphalia, 08 Baden-Württemberg, 14 Sachsen-Anhalt).

“Special” states:

Red vertical lines indicate a cutoff that did not exist over the entire time period.

# 01 Schleswig-Holstein
df_subset <- data %>%
  filter(state == 1 & (election_year == 1 | year == 2002) & kreisfreie_stadt == 0)
correlation <-
  cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  geom_vline(xintercept = th_1_muni_since_2012) +
  geom_vline(xintercept = c(2000, 2500), colour = "red") +
  geom_point(aes(
    x = exact_pop,
    y = total_seats,
    colour = as.factor(year),
    text = paste("town:", town)
  )) +
  labs(title = paste0("State: ", 1, "; linear correlation: ", correlation, "; no kreisfreie Städte")) +
  scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
# 12 Brandenburg
df_subset <- data %>%
  filter(state == 12 & (election_year == 1 | year == 2002) & kreisfreie_stadt == 0)
correlation <-
  cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  geom_vline(xintercept = th_12_muni_since_2008) +
  geom_point(aes(
    x = exact_pop,
    y = total_seats,
    colour = as.factor(year),
    text = paste("town:", town)
  )) +
  labs(title = paste0("State: ", 12, "; linear correlation: ", correlation, "; no kreisfreie Städte")) +
  scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
# 15 Saxony-Anhalt
df_subset <- data %>%
  filter(state == 15 & (election_year == 1 | year == 2002))
correlation <-
  cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  geom_vline(xintercept = th_15_before_2014, colour = "red") +
  geom_vline(xintercept = th_15_since_2014) +
  geom_point(aes(
    x = exact_pop,
    y = total_seats,
    colour = as.factor(year),
    text = paste("town:", town)
  )) +
  labs(title = paste0("State: ", 15, "; linear correlation: ", correlation)) +
  scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)

12 Brandenburg also has quite a strong relationship but no population data beyond 2019. 15 Saxony-Anhalt may have had some special rules after 2007 county reform (to do: investigate)!

Export to LaTeX

For 01, cutoff change during observation period.

to do: First year shown in plot not yet correct.

# Create data
th_1 <- c(70, 200, 750, 1250, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_change <- c(2000, 2500)
th_1 <- c(70, 200, 750, 1250, 2000, 5000, 10000, 15000, 25000, 35000, 45000)
th_15 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_15_change <- c(100, 500)
th_15 <- c(th_15, th_15_change)
plot_list <- list()
plot_df <- d_rdd_bivariate %>%
  mutate(
    year = case_when(
      state == 1 & year == 2002 ~ 1998,
      state %in% c(5, 7, 8, 10, 13, 14, 16) & year == 2002 ~ 1999,
      state == 6 & year == 2002 ~ 2001,
      TRUE ~ year
    ),
    year = as.factor(year)
    )
# Plot
for (s in c(1, 5:10, 13, 14, 15, 16)) {
  # Specific rules for "kreisfreie Städte" in 01
  df_subset <- plot_df
  if (s == 1) {
    df_subset <- df_subset %>% filter(kreisfreie_stadt == 0)
  }
  plot_list[[s]] <- df_subset %>%
    filter(state == s) %>%
    filter((election_year == 1 | as.numeric(year) == min(as.numeric(year)))) %>%
    ggplot() +
    geom_vline(xintercept = get(paste0("th_", s))) +
    geom_point(aes(
      x = exact_pop,
      y = total_seats#,
      #colour = year
    ),
    size = 0.5) +
    #labs(title = paste0("State: ", s)) +
    scale_x_log10() +
    theme(
      axis.title.x=element_blank(), # remove x axis label
      axis.title.y=element_blank(), # remove y axis label
      legend.title=element_blank() # remove legend title
      )
  # Cutoff changes
  # if (s == 1 | s == 15) {
  #   plot_list[[s]] <- plot_list[[s]] +
  #     geom_vline(xintercept = get(paste0("th_", s, "_change")),
  #                colour = "#F8766D", linetype = "dashed")
  # }
  # Show plots
  #print(plot_list[[s]])
}
rm(df_subset, th_1, th_1_change, th_15, th_15_change)

Arrange plots:

first_stage <- ggpubr::ggarrange(
  plot_list[[1]], plot_list[[5]], plot_list[[6]], plot_list[[7]], plot_list[[8]],
  plot_list[[9]], plot_list[[10]], plot_list[[13]], plot_list[[14]], plot_list[[15]], plot_list[[16]],
  # labels = c("SH", "NW", "Hesse", "Rhineland-Palatinate", "Baden-Württemberg",
  #            "Bavaria", "Saarland", "Mecklenburg-Vorpommern", "Saxony", "Thuringia"),
  labels = paste(" ", c("SH", "NW", "HE", "RP", "BW", "BY", "SL", "MV", "SN", "ST", "TH")),
  #label.x = "Population",
  #label.y = "Council size",
  #common.legend = TRUE,
  ncol = 2, nrow = 6) %>%
  annotate_figure(
    # See https://github.com/kassambara/ggpubr/issues/78
    left = text_grob("Council size", rot = 90),
    bottom = text_grob("Population")
)
first_stage

Export plots:

ggpubr::ggexport(first_stage, filename = "plots/rdd_first_stage.pdf")
## file saved to plots/rdd_first_stage.pdf

“Expected” number of seats vs. actual seats number

Expected is what I deemed the size of the local parliament based on laws and ignoring voluntary reductions as well as “overhang seats”

df_subset <- data %>%
  filter((election_year == 1 | year == 2002))
correlation <-
  cor(df_subset$by_law_seats, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  #geom_vline(xintercept = th_1_muni_since_2012) +
  #geom_vline(xintercept = c(2000, 2500), colour = "red") +
  geom_point(aes(
    x = by_law_seats,
    y = total_seats,
    colour = as.factor(state),
    text = paste("town:", town, "<br>year:", year)
  )) +
  labs(title = paste0("All states", "; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)

Inhabitants vs. council size for all states

Linear difference:

df_subset <- data %>%
  filter(election_year == 1 | year == 2002)
correlation <-
  cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  geom_vline(xintercept = 0) +
  geom_point(aes(
    x = diff_to_cutoff,
    y = total_seats,
    colour = as.factor(state),
    #shape = as.factor(state),
    text = paste("town:", town, "<br>year:", year)
  )) +
  labs(title = paste0("All states; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)

Using the Egger and Koethenbuerger (2010) calculation steps: \[\tilde{N_i} = N_i/N_d\] with \(N_i\) as the relevant actual population size and \(N_d\) as the respective thresholds.

df_subset <- data %>%
  filter(election_year == 1 | year == 2002)
correlation <-
  cor(df_subset$inhabs_rel_to_cutoff, df_subset$total_seats, use = "pairwise.complete.obs") %>%
  round(., 2)
plot <- df_subset %>%
  ggplot() +
  geom_vline(xintercept = 0) +
  geom_point(aes(
    x = inhabs_rel_to_cutoff,
    y = total_seats,
    colour = as.factor(state),
    #shape = as.factor(state),
    text = paste("town:", town, "<br>year:", year)
  )) +
  labs(title = paste0("All states; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)

Inhabitants and public spending for each state separately

Relationship between inhabitants and public spending for those states that did not change cutoffs:

plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
  df_subset <- data %>%
    filter(state == s & (election_year == 1 | year == 2002))
  correlation <-
    cor(df_subset$exact_pop, df_subset$ln_gross_expenditure_pc, use = "pairwise.complete.obs") %>%
    round(., 2)
  plot <- df_subset %>%
    ggplot() +
    geom_vline(xintercept = get(paste0("th_", s))) +
    geom_point(aes(
      x = exact_pop,
      y = ln_gross_expenditure_pc,
      colour = as.factor(year),
      text = paste("town:", town)
    )) +
    labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
    scale_x_log10() +
    scale_y_log10()
  plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list
rm(s, plot_list, plot, df_subset)

Relationship between inhabitants and public spending per capita for those states that did not change cutoffs:

plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
  df_subset <- data %>%
    filter(state == s & (election_year == 1 | year == 2002))
  correlation <-
    cor(df_subset$exact_pop, df_subset$ln_gross_expenditure_pc, use = "pairwise.complete.obs") %>%
    round(., 2)
  plot <- df_subset %>%
    ggplot() +
    geom_vline(xintercept = get(paste0("th_", s))) +
    geom_point(aes(
      x = exact_pop,
      y = ln_gross_expenditure_pc,
      colour = as.factor(year),
      text = paste("town:", town)
    )) +
    labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
    scale_x_log10()
  plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list
rm(s, plot_list, plot, df_subset)